#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Created by master on 2017/3/15
import os
import urllib.request
import re

page = 1
url = 'http://www.qiushibaike.com/hot/page/' + str(page)
# url = "http://www.kuaidaili.com/"
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent': user_agent}
try:
    request = urllib.request.Request(url, headers=headers)
    response = urllib.request.urlopen(request)
    content = response.read().decode('utf-8')
    pattern = re.compile('<div.*?author">.*?<a.*?<img.*?>(.*?)</a>.*?<div.*?' +
                         'content">(.*?)<!--(.*?)-->.*?</div>(.*?)<div class="stats.*?class="number">(.*?)</i>', re.S)
    items = re.findall(pattern, content)
    print(content)
    for item in items:
        haveImg = re.search("img", item[3])
        # if not haveImg:
        print(item[0], item[1], item[2], item[4])
except Exception as e:
    print(e)
